knitr::opts_chunk$set(echo = TRUE)
######DO NOT MODIFY. This will load required packages and data.
library(tidyverse)
library(gganimate)
library(gifski)
library(plotly)
cces <- drop_na(read_csv(url("https://www.dropbox.com/s/ahmt12y39unicd2/cces_sample_coursera.csv?raw=1")))
cel <- drop_na(read_csv(url("https://www.dropbox.com/s/4ebgnkdhhxo5rac/cel_volden_wiseman%20_coursera.csv?raw=1")))
Data: cces
Explain what you are visualizing here: A bar chart shows the educational levels of respondents towards their job in the cces file
Put your figure here:
data1 <-cces
data1 <- data1 %>%
mutate(
Gender = case_when(
gender == 1 ~ "Male",
gender == 2 ~ "Female"))
data1 <- data1 %>%
mutate(
Educ = case_when(
educ == 1 ~ "No high school",
educ == 2 ~ "High school graduate",
educ == 3 ~ "Some college",
educ == 4 ~ "2-year",
educ == 5 ~ "4-year",
educ == 6 ~ "Post-grad"))
data1$Educ = factor(data1$Educ, levels = c ("No high school", "High school graduate", "Some college", "2-year", "4-year", "Post-grad"))
data1 <- data1 %>%
mutate(
transCC18_308a = case_when(
CC18_308a == 1 ~ "Strongly approve",
CC18_308a == 2 ~ "Somewhat approve",
CC18_308a == 3 ~ "Somewhat disapprove",
CC18_308a == 4 ~ "Strongly disapprove"))
data1$transCC18_308a = factor(data1$transCC18_308a, levels = c ("Strongly disapprove", "Somewhat disapprove", "Somewhat approve", "Strongly approve"))
data1.1 <- data1 %>% count(Gender, Educ, transCC18_308a, sort = TRUE)
colnames(data1.1) <- c("Gender", "Education", "Opinion", "Count")
data1.2 <- data1 %>% count(Gender, transCC18_308a, sort = TRUE)
colnames(data1.2) <- c("Gender", "Opinion", "Count")
ggplot(data1.2, aes( y = Count, x = Gender, fill = Opinion, label = Count)) +
stat_summary(geom = "bar", position = "dodge") + # Cannot use geom_bar()
labs( x = "Gender", y= "Number of respondents", title = "The opinion of respondents") +
theme(plot.title = element_text(hjust = 0.5)) + # Adjust the title to the center
geom_text(position = position_dodge(width = .9), vjust = - 0.5, size = 3) # Adjust the label to the center of each bar, vjust: adjust the height position of label
a <- ggplot(data1.2, aes( y = Count, x = Gender, fill = Opinion)) +
stat_summary(geom = "bar", position = "dodge") +
labs( x = "Gender", y= "Number of respondents", title = "The opinion of respondents") +
theme(plot.title = element_text(hjust = 0.5))
ggplotly(a)
We can see that there are 4 levels of opinions: strongly disapprove, disapprove, approve, strongly approve.For further analysis, the following graph will cover the Educational levels of respondents in each type of opinions:
ggplot(data1.1, aes( y = Count, x= Gender, fill = Opinion, label = Count)) +
stat_summary(geom = "bar", position = "dodge") + # Cannot use geom_bar
labs( x = "Gender", y= "Number of respondents", title = "The opinion of respondents") +
theme(plot.title = element_text(hjust = 0.5)) + # Adjust the title to the center
geom_text(position = position_dodge(width = .9), vjust = - 0.5, size = 3) + # Adjust the label to the center of each bar, vjust: adjust the height position of label
facet_wrap(~Education) + ylim(0,80)
a <- ggplot(data1.1, aes( y = Count, x= Gender, fill = Opinion)) +
stat_summary(geom = "bar", position = "dodge") + # Cannot use geom_bar
labs( x = "Gender", y= "Number of respondents", title = "The opinion of respondents") +
theme(plot.title = element_text(hjust = 0.5)) + # Adjust the title to the center
facet_wrap(~Education)
ggplotly(a)
Next, I will illustrate the remaining types of bar chart which makes it easier to compare proportions: First chart shows the opinion of respondents on the scale of 1, second chart shows the number of respondents stack on one another. It’s a little bit difficult to calculate and show percentage label for 1st chart and 2nd chart. So I use the interactive charts.
ggplot(data1.1, aes( y = Count, x = Gender, fill = Opinion)) +
stat_summary(geom = "bar", position="fill") +
labs( x = "Gender", y= "Proportion of respondents", title = "The opinion of respondents") +
facet_wrap(~Education) +
scale_y_continuous(labels = scales::percent_format()) +
theme(plot.title = element_text(hjust = 0.5))
ggplot(data1.1, aes( y = Count, x = Gender, fill = Opinion)) +
stat_summary(geom = "bar", position="stack") +
labs( x = "Gender", y= "Number of respondents", title = "The opinion of respondents") +
facet_wrap(~Education) +
theme(plot.title = element_text(hjust = 0.5))
For interactive plot:
b <- ggplot(data1.1, aes( y = Count, x = Gender, fill = Opinion)) +
stat_summary(geom = "bar", position="fill") +
labs( x = "Gender", y= "Proportion of respondents", title = "The opinion of respondents") +
facet_wrap(~Education) +
scale_y_continuous(labels = scales::percent_format()) +
theme(plot.title = element_text(hjust = 0.5))
ggplotly(b)
b1 <- ggplot(data1.1, aes( y = Count, x = Gender, fill = Opinion)) +
stat_summary(geom = "bar", position="stack") +
labs( x = "Gender", y= "Number of respondents", title = "The opinion of respondents") +
facet_wrap(~Education) +
theme(plot.title = element_text(hjust = 0.5))
ggplotly(b1)
Let try using animation graph
Explain what you are visualizing here: A bar chart shows the number of respondents education in the cces file
Put your figure here:
ggplot(data1.1, aes( y = Count, x= Gender, fill = Opinion, label = Count)) +
stat_summary(geom = "bar", position = "dodge") +
labs( x = "Gender", y= "Number of respondents", title = "The opinion of respondents", subtitle = 'Education: {closest_state}') +
theme(plot.title = element_text(hjust = 0.5)) +
geom_text(position = position_dodge(width = .9), vjust = - 0.5, size = 4) +
theme(
plot.title = element_text(size = (18), face = "bold" ),
plot.subtitle = element_text(size = (15)),
legend.title = element_text(size = (15), face = "bold.italic", colour = "black"),
legend.text = element_text(size = (14), face = "italic", colour = "black"),
axis.title = element_text(size = (15), colour = "black"),
axis.text = element_text(size = (13), colour = "black" ) ) +
transition_states(Education, transition_length = 0, state_length = 2) +
enter_fade() + exit_fade()
anim_save("The opinion of respondents.gif")
For interactive plot with animation:
plot_ly(data1.1) %>%
add_bars(
x = ~Gender, xend = ~Gender,
y = ~ Count, yend = ~0,
frame = ~Education,
color = ~ Opinion ) %>%
animation_slider( currentvalue = list(prefix = "Education: ", font = list(color="red"))) %>%
animation_opts(1000, easing = "elastic", redraw = FALSE)
Data: cel
Explain what you are visualizing here: Drawing a scatter plot illustrating the relationship between the DW-Nominate score of each member (dwnom1) and the number of the bills introduced in congress 110 (all_bills) according to years (elected)
Put your figure here:
data2 <- filter(cel, congress == 110)
data2 <- data2 %>%
mutate(
Gender = case_when(
female == 0 ~ "Male",
female == 1 ~ "Female"))
ggplot(data2, aes(x= dwnom1, y = all_bills, colour = Gender)) +
geom_point() +
facet_wrap(~elected) +
labs(x ="DW-Nominate score", y = "Number of bills", title = "The DW-Nominate score and Number of bills introduced each year") +
theme(plot.title = element_text(hjust = 0.5))
Let try using interactive plot:
For interactive and animation plot:
c1 <- ggplot(data2, aes(x= dwnom1, y = all_bills, color = Gender)) +
geom_point(aes(frame = elected)) +
labs(x ="DW-Nominate score", y = "Number of bills", title = "The DW-Nominate score and Number of bills introduced each year") +
theme(plot.title = element_text(hjust = 0.5))
## Warning: Ignoring unknown aesthetics: frame
ggplotly(c1)
## Warning in p$x$data[firstFrame] <- p$x$frames[[1]]$data: number of items to
## replace is not a multiple of replacement length
Problem arises when we plot the graph with Color for Different Genders. Similar problem is already indicated in many topics and solution cannot be found. (https://community.plotly.com/t/frame-showing-less-category-than-actual/8628). The Warning message shows that the frame shows less category than normal.
Only female or male are showed (from year 1986).
However. if I alter the option aes(Color = Gender) by aes(ids/label=Gender, text1 = …), the frame can show both Gender at the same time (ofcourse with no Color)
c2 <- ggplot(data2, aes(x= dwnom1, y = all_bills)) +
geom_point(aes(frame = elected, ids = Gender, text1 = thomas_name, text2 = seniority)) +
labs(x ="DW-Nominate score", y = "Number of bills", title = "The DW-Nominate score and Number of bills introduced each year") +
theme(plot.title = element_text(hjust = 0.5))
ggplotly(c2)
Explain what you are visualizing here: Drawing a (interactive) line illustrating the number of the bills introduced in congress 110 (all_bills) according to year the member was elected (elected)
Put your figure here:
data3 <- filter(cel, congress == 110)
data3 <- data3 %>%
mutate(
Gender = case_when(
female == 0 ~ "Male",
female == 1 ~ "Female"))
ggplot(data3, aes(x= elected, y = all_bills, colour = Gender)) +
stat_summary(fun.y = mean, geom = "point") +
stat_summary(fun.y = mean, geom = "line", aes(group = 1)) +
labs( x= "Elected Year", y = "Mean of bills", title = "The mean value of bills of the members in each elected year") +
theme_bw() + # theme_bw must be set before theme() to set title in center
theme(plot.title = element_text(hjust = 0.5)) +
theme(legend.position='none') +
facet_wrap(~Gender, as.table = FALSE)
Let check some values by filter the “Elected Year” 1990 - 1992 - 1994
library(pastecs)
year1990 <- filter(data3, elected ==1990)
by(year1990$all_bills, year1990$Gender, stat.desc )
## year1990$Gender: Female
## nbr.val nbr.null nbr.na min max range
## 2.0000000 0.0000000 0.0000000 31.0000000 36.0000000 5.0000000
## sum median mean SE.mean CI.mean.0.95 var
## 67.0000000 33.5000000 33.5000000 2.5000000 31.7655118 12.5000000
## std.dev coef.var
## 3.5355339 0.1055383
## ------------------------------------------------------------
## year1990$Gender: Male
## nbr.val nbr.null nbr.na min max range
## 13.0000000 0.0000000 0.0000000 3.0000000 33.0000000 30.0000000
## sum median mean SE.mean CI.mean.0.95 var
## 150.0000000 11.0000000 11.5384615 2.2999871 5.0112415 68.7692308
## std.dev coef.var
## 8.2927216 0.7187025
year1992 <- filter(data3, elected ==1992)
by(year1992$all_bills, year1992$Gender, stat.desc)
## year1992$Gender: Female
## nbr.val nbr.null nbr.na min max range
## 8.0000000 0.0000000 0.0000000 7.0000000 74.0000000 67.0000000
## sum median mean SE.mean CI.mean.0.95 var
## 213.0000000 20.5000000 26.6250000 7.8078200 18.4625606 487.6964286
## std.dev coef.var
## 22.0838499 0.8294404
## ------------------------------------------------------------
## year1992$Gender: Male
## nbr.val nbr.null nbr.na min max range
## 36.0000000 0.0000000 0.0000000 2.0000000 52.0000000 50.0000000
## sum median mean SE.mean CI.mean.0.95 var
## 632.0000000 15.0000000 17.5555556 2.2040527 4.4744649 174.8825397
## std.dev coef.var
## 13.2243162 0.7532838
year1994 <- filter(data3, elected ==1994)
by(year1994$all_bills, year1994$Gender, stat.desc)
## year1994$Gender: Female
## nbr.val nbr.null nbr.na min max range
## 4.0000000 0.0000000 0.0000000 14.0000000 41.0000000 27.0000000
## sum median mean SE.mean CI.mean.0.95 var
## 108.0000000 26.5000000 27.0000000 5.9581876 18.9616123 142.0000000
## std.dev coef.var
## 11.9163753 0.4413472
## ------------------------------------------------------------
## year1994$Gender: Male
## nbr.val nbr.null nbr.na min max range
## 26.0000000 0.0000000 0.0000000 1.0000000 53.0000000 52.0000000
## sum median mean SE.mean CI.mean.0.95 var
## 355.0000000 11.0000000 13.6538462 2.0523475 4.2268888 109.5153846
## std.dev coef.var
## 10.4649598 0.7664478
d <- ggplot(data3, aes(x= elected, y = all_bills, colour = Gender)) +
stat_summary(fun.y = mean, geom = "point") +
stat_summary(fun.y = mean, geom = "line", aes(group = 1)) +
labs( x= "Elected Year", y = "Mean of bills", title = "The mean value of bills of the members in each elected year") +
theme_bw() + # theme_bw must be set before theme() to set title in center
theme(plot.title = element_text(hjust = 0.5)) +
theme(legend.position='none') +
facet_wrap(~Gender, as.table = FALSE)
ggplotly(d)
Drawing box plot to identify median - min - max of these value:
d <- ggplot(data3, aes(x= elected, y = all_bills, colour = Gender)) +
geom_boxplot()+
labs( x= "Elected Year", y = "Mean of bills", title = "The mean value of bills of the members in each elected year") +
theme_bw() + # theme_bw must be set before theme() to set title in center
theme(plot.title = element_text(hjust = 0.5)) +
theme(legend.position='none') +
facet_wrap(~Gender, as.table = FALSE)
ggplotly(d)
A few comments on interactive box plot and line graph: 1. No value on
mean of bills is shown on the interactive graph.
2. A box plot only shows the plot for a few years (not all of the
data)
As we can see the value of mean(Female) of 1992 and 1994 are 26.625 and 27 but it can’t be shown on the interactive line graph. In my opinion, these values are supposed to be shown.
Explain what you are visualizing here: Drawing a box plot illustrate the number of bills (all_law) that the member(female) introduced that became law in congress 110
Put your figure here:
data4 <- filter(cel, congress == 110)
data4 <- data4 %>%
mutate(
Gender = case_when(
female == 0 ~ "Male",
female == 1 ~ "Female"))
data4 <- data4 %>%
mutate(
Dem = case_when(
dem == 0 ~ "Republican",
dem == 1 ~ "Democrat"))
g <- ggplot(data4, aes(x= Dem, y = all_bills, fill = Dem)) +
geom_boxplot() +
labs( x= "Political view", y = "The number of bills", title = "The number of bills introduced in congress 110") +
theme(plot.title = element_text(hjust = 0.5)) +
theme(legend.position='none')+
facet_wrap(~Gender, as.table = FALSE)
g
ggplotly(g)